import json
import geoip2.database
import statistics as st
import pprint
import numpy as np
import os
from enum import IntEnum
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import math
from pathlib import Path
from collections import Counter, defaultdict

import crawleth.parse_data as parse_data
import analysis.utils.countries_codes as country_utils


class Continent:
    def __init__(self, continent_name):
        self.continent_name = continent_name
        self.dict: dict(int) = {}
        self.continent_count = 0
    
    def add_country_count(self, country, count):
        if country in self.dict:
            self.dict[country] = count
            self.continent_count += count
    
    def __str__(self):
        print(f"{self.continent_name} count = {self.continent_count}")
        most_common_countries = sorted(d, key=lambda i: int(d[i]))
        for most_common in most_common_countries[:10]:
            print(f"\t{most_common} count = {self.dict[most_common]}")

    def __repr__(self):
        print(f"{self.continent_name} count = {self.continent_count}")
        most_common_countries = sorted(d, key=lambda i: int(d[i]))
        for most_common in most_common_countries[:10]:
            print(f"\t{most_common} count = {self.dict[most_common]}")

class SubnetDetails:
    def __init__(self, subnet:str):
        self.subnet = subnet
        self.ips = set()
        self.number_ips = 0
        self.countries = defaultdict(int)
        self.asn = defaultdict(int)
        self.AS = {}

    def add_ip(self, ip:str):
        self.ips.add(ip)
    
    def update_countries(self, raw_results):
        self.number_ips = len(self.ips)
        for ip in tqdm(self.ips, leave=False):
            found = False
            i = 0
            while(i < len(raw_results) and not found):
                raw_result_file = raw_results[i]
                j = 0
                with open(raw_result_file) as fp:
                    raw_result_json = json.load(fp)
                while(j < len(raw_result_json["up"]) and not found):
                    node = raw_result_json["up"][j]
                    if node["IP address"] == ip:
                        found = True
                    j+=1
                i+=1
            if found:
                self.countries[node["country"]] += 1
                self.asn[node["asn"]] += 1
                self.AS[node["asn"]] = node["as"]
            else:
                print(f"{ip} not found, strange !")

    
    def __str__(self):
        string = f"Subnet {self.subnet} contains {self.number_ips} IPs\n"
        for key in self.countries:
            string += f"\tCountry {key} contains {self.countries[key]} IPs\n"
        for key in self.asn:
           string += f"\tASN {key} ({self.AS[key]}) contains {self.asn[key]} IPs\n"
        return string

    def _is_valid_operand(self, other):
        return (hasattr(other, "subnet"))


    def __eq__(self, other):
        if not self._is_valid_operand(other):
            return NotImplemented
        return self.subnet == other.subnet


def add_geo_to_nodes(raw_results_json: list[str]):
    with geoip2.database.Reader('geoip/old/GeoLite2-City.mmdb') as city_reader, \
         geoip2.database.Reader('geoip/old/GeoLite2-ASN.mmdb') as asn_reader:
        for filename in tqdm(raw_results_json):
            with open(filename, 'r') as fp:
                json_obj = json.load(fp)
            for node in tqdm(json_obj["up"], leave=False):
                try:
                    asn_response = asn_reader.asn(node["IP address"])
                    node["as"] = asn_response.autonomous_system_organization
                    node["asn"] = f"AS{asn_response.autonomous_system_number}"
                    city_response = city_reader.city(node["IP address"])
                    node["city"] = city_response.city.name
                    node["country"] = city_response.country.name
                    node["country_code"] = city_response.country.iso_code
                    node["latitude"] = city_response.location.latitude
                    node["longitude"] = city_response.location.longitude
                except geoip2.errors.AddressNotFoundError:
                    node["as"] = None
                    node["asn"] = None
                    node["city"] = None
                    node["country"] = None
                    node["country_code"] = None
                    node["latitude"] = None
                    node["longitude"] = None
            with open(filename, 'w') as fp:
                json.dump(json_obj, fp)


def print_node_geo(ips: list[str]):
    with geoip2.database.Reader('geoip/GeoLite2-City.mmdb') as city_reader, \
         geoip2.database.Reader('geoip/GeoLite2-ASN.mmdb') as asn_reader:
        for ip in ips:
            try:
                asn_response = asn_reader.asn(ip)
                print(f"{asn_response.autonomous_system_organization}, AS{asn_response.autonomous_system_number}")
            except geoip2.errors.AddressNotFoundError:
                print("No AS info")
            city_response = city_reader.city(ip)
            print(f"{city_response.city.name}, {city_response.country.name}, {city_response.country.iso_code}, {city_response.location.latitude}, {city_response.location.longitude}")


def print_geo_distrib(raw_results_json: list[str]):
    distrib_per_crawl: list(Counter) = []
    cnt = Counter()
    for filename in tqdm(raw_results_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        for node in json_obj['up']:
            country_code = node['country_code']
            for continent in country_utils.Continent:
                if country_code in country_utils.continents[continent.value]:
                    cnt[continent.name] += 1
                    cnt[country_code] += 1
                    break
        distrib_per_crawl.append(cnt)
        cnt = Counter()
    distrib_avg: dict(int) = {}
    for continent in country_utils.Continent:
        distrib_avg[continent.name] = round(st.mean([distrib[continent.name] for distrib in distrib_per_crawl]), 1)
        tmp = {}
        for country_code in country_utils.continents_countries[continent]:
            tmp[country_code] = round(st.mean([distrib[country_code] for distrib in distrib_per_crawl]), 1)
        most_common_countries = sorted(tmp, key=lambda i: tmp[i], reverse=True)
        for most_common in most_common_countries[:10]:
            distrib_avg[most_common] = tmp[most_common]
    for continent in country_utils.Continent:
        if distrib_avg[continent.name] == 0:
            continue
        print(f"{continent} count = {distrib_avg[continent.name]}")
        for country_code in sorted(distrib_avg, key=lambda i: distrib_avg[i], reverse=True):
            if country_code in country_utils.continents_countries[continent]:
                print(f"\t{country_code} count = {distrib_avg[country_code]}")

def discovered_nodes_per_sec(crawl_json):
    with open(crawl_json, "r") as f:
        data = json.load(f)

    x1, y1 = parse_data.extract_pending(data)
    x2, y2 = parse_data.extract_up(data)
    plt.plot(y1, x1)
    plt.plot(y2, x2)

    plt.xlabel('Elapsed time (s)')
    plt.ylabel('Number of nodes')
    plt.legend(["discovered nodes", "up nodes"])
    plt.tight_layout()
    plt.savefig("cumulative_nodes_discovered_walle.pdf", facecolor='none', edgecolor='none')
    plt.show()

    df = parse_data.extract_pending_per_s(data)
    sns.lineplot(x=df.index, y="number of nodes", data=df)
    plt.xlabel('Elapsed time (s)')
    plt.ylabel('Number of nodes')
    plt.tight_layout()
    plt.savefig("nodes_discovered_per_sec_walle.pdf", facecolor='none', edgecolor='none')
    plt.show()


class ChurnPeriod(IntEnum):
    ONEHOUR = 1
    TWOHOUR = 2
    FOURHOUR = 4
    EIGHTHOUR = 8
    ONEDAY = 24


def _gen_churn_result_filename(files: list[str], period=ChurnPeriod.ONEDAY):
    start = os.path.splitext(os.path.basename(files[-1]))[0].split('_')[-1]
    end = os.path.splitext(os.path.basename(files[0]))[0].split('_')[-1]
    result_filename = f"churn_{start}--{end}--{period.name.lower()}.json"
    return Path('data') / result_filename


def churn(raw_results_json: list[str], period=ChurnPeriod.ONEDAY):
    files = sorted(raw_results_json, reverse=True)
    result_filename = _gen_churn_result_filename(files, period)
    files_number = period
    nodes_set = set()
    nodes_sets = []
    pbar = tqdm(total=len(files))
    while files:
        pbar.update(files_number)
        current = os.path.basename(files[-1])
        current = os.path.splitext(current)[0]
        current = current.split('_')[-1]
        if period == ChurnPeriod.ONEDAY:
            current = '-'.join(current.split('-')[0:3])
        else:
            current = current[:10] + ' ' + current[11:]
        for _ in range(files_number):
            filename = files.pop()
            with open(filename, 'r') as f:
                nodes = json.load(f)['up']
            for node_info in nodes:
                nodes_set.add(f"{node_info['IP address']}-{node_info['UDP port']}")
        nodes_sets.append((f'{current}', nodes_set))
        nodes_set = set()
    pbar.close()
    missing_list = []
    new_list = []
    result = {}
    for (day_i, nodes_set_i), (day_j, nodes_set_j) in zip(nodes_sets, nodes_sets[1:]):
        print(f'Période : {day_i} / {day_j}')
        print(f'Nombre noeuds : {len(nodes_set_i)}/{len(nodes_set_j)}')
        missing = len(nodes_set_i - nodes_set_j)
        print(f'Présent dans le set1 et pas dans le set2 : {missing}')
        missing_pct = missing/len(nodes_set_i)*100
        missing_list.append(missing_pct)
        missing_pct = round(missing_pct, 2)
        print(f'Noeuds manquants : {missing_pct}%')
        new = len(nodes_set_j - nodes_set_i)
        print(f'Présent dans le set2 et pas dans le set1 : {new}')
        new_pct = new/len(nodes_set_j)*100
        new_list.append(new_pct)
        new_pct = round(new_pct, 2)
        print(f'Nouveaux noeuds: {new_pct}%\n')
        result[f'{day_i}–{day_j}'] = {
            'Nodes leaving the network': missing_pct,
            'Nodes (re)joining the network': new_pct
        }
    print(f'Min missing {round(min(missing_list),2)}%, '
          f'Max missing {round(max(missing_list),2)}% '
          f'(or {round(sorted(missing_list)[-2],2)}), '
          f'Mean missing {round(st.mean(missing_list),2)}% (or '
          f'{round(st.mean(sorted(missing_list)[:-1]),2)})')
    print(f'Min new {round(min(new_list),2)}%, Max new {round(max(new_list),2)}%, '
          f'Mean new {round(st.mean(new_list),2)}%')
    with open(result_filename, 'w') as f:
        json.dump(result, f)


def display_churn(json_file: str, nth_x_axis=1):
    with open(json_file, 'r') as f:
        result = json.load(f)
    df = pd.DataFrame.from_dict(result, orient='index')
    df.columns = ['Nodes leaving the network', 'Nodes (re)joining the network']
    print(df)
    fig, ax = plt.subplots(figsize=(11,6))
    sns.lineplot(data=df, sort=False)
    ax.set(xlabel='Time', ylabel='Rate (%)')
    print(len(ax.get_xticklabels()))
    last = len(ax.get_xticklabels()) - 1
    ax.set_ylim(ymin=0, ymax=20) # 50 for oneday
    print(last,nth_x_axis)
    rg = np.linspace(0, last, nth_x_axis).astype(int).tolist()
    print(rg)
    ax.set_xticks(rg)
    fig.autofmt_xdate()
    plt.tight_layout()
    plt.savefig("churn.pdf", facecolor='none', edgecolor='none')
    plt.show()


def nodes_numbers_stats(raw_results_json: list[str]):
    nodes_numbers_date = {}
    nodes_numbers = []
    nodeids_number = set()
    unique_ips_number = set()
    start = raw_results_json[0].split('_')[-1]
    end = raw_results_json[-1].split('_')[-1]
    for filename in tqdm(raw_results_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        date = filename.split('_')[-1]
        nodes_numbers_date.setdefault(json_obj["stats"]["up_count"], date)
        nodes_numbers.append(json_obj["stats"]["up_count"])
        for up in json_obj["up"]:
            nodeids_number.update(up["Seen node IDs"])
            unique_ips_number.add(f'{up["IP address"]}:{up["UDP port"]}')
    print(f"From {start} to {end}")
    print(f"Min : {min(nodes_numbers)} occured during a crawl during the hour preceding {nodes_numbers_date[min(nodes_numbers)]}")
    print(f"Max : {max(nodes_numbers)} occured during a crawl during the hour preceding {nodes_numbers_date[max(nodes_numbers)]}")
    print(f"Mean : {st.mean(nodes_numbers)}")
    print(f"Median : {st.median(nodes_numbers)}")
    print(f"Total number of unique Node IDs : {len(nodeids_number)}")
    print(f"Total number of unique IP:port  : {len(unique_ips_number)}")


def subnets_stats(subnets_json: list[str], raw_data_json: list[str]):
    unique_ips = set()
    numbers_ips = []
    numbers_subnets = []
    subnets = set()
    subnets_with_ports = set()
    subnets_details = {}
    subnets_details_with_port = []
    for filename in tqdm(subnets_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        number_ips = 0
        threshold = json_obj["threshold"]
        objs = json_obj["results"]
        numbers_subnets.append(len(objs))
        for prefix in objs.keys():
            last_bytes = objs[prefix].keys()
            subnets_with_ports.add(prefix)
            if len(last_bytes) < threshold:
                # subnet_details_with_port = SubnetDetails(prefix)
                # for last_byte in last_bytes:
                #     ip = f"{prefix}.{last_byte}"
                #     subnet_details_with_port.add_ip(ip)
                # subnet_details_with_port.update_countries(raw_data_json)
                # subnets_details_with_port.append(subnet_details_with_port)
                continue
            subnets.add(prefix)
            if prefix in subnets_details:
                subnet_details = subnets_details[prefix]
            else:
                subnet_details = SubnetDetails(prefix)
            for last_byte in last_bytes:
                # ports = objs[prefix][last_byte]
                # for port in ports:
                number_ips += 1
                ip = f"{prefix}.{last_byte}"
                subnet_details.add_ip(ip)
                unique_ips.add(ip)
            subnets_details[prefix] = subnet_details
        numbers_ips.append(number_ips)
    print(f"Threshold : {threshold} IP addresses in subnet")
    print(f"Number of total unique nodes in /24 subnets exceeding 24 ip : {len(unique_ips)}")
    print(f"Average number of /24 subnets that contain more than 24 nodes (only ip not ip-port) : {round(st.mean(numbers_subnets),2)}")
    print(f"Subnets (only ip) : f{subnets}")
    print(f"Subnets (ip and ip-port) : {subnets_with_ports} ({len(subnets_with_ports)})")
    print(f"Average number of IP addresses per hour : {round(st.mean(numbers_ips),2)}")
    print(f"Median number of IP addresses per hour : {st.median(numbers_ips)}")
    print("\n\n\n")
    for _, subnet_details in subnets_details.items():
        subnet_details.update_countries(raw_data_json)
    subnets_details = dict(sorted(subnets_details.items(), key=lambda item: getattr(item[1], 'number_ips'), reverse=True))
    for _, subnet_details in subnets_details.items():
        print(subnet_details)
    # subnets_details_with_port = sorted(subnets_details_with_port, key=lambda x: getattr(x, 'number_ips'), reverse=True)
    # for subnet_details in subnets_details_with_port:
    #     print(subnet_details)
    # print(numbers_ips)


def aliases_stats(aliases_json: list[str], raw_results: list[str]):
    raw_results.reverse()
    unique_ips = set()
    numbers_aliases = []
    aliases = set()
    ip_nodeid = defaultdict(set)
    number_ips = []
    for filename in tqdm(aliases_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        threshold = json_obj["threshold"]
        objs = json_obj["results"]
        number_ips.append(len(objs.keys()))
        aliases_tmp = set()
        raw_result_file = raw_results.pop()
        for ip in tqdm(objs.keys(), leave=False):
            found = False
            i = 0
            with open(raw_result_file) as fp:
                raw_result_json = json.load(fp)
                while(i < len(raw_result_json["up"]) and not found):
                    node = raw_result_json["up"][i]
                    if node["IP address"] == ip:
                        found = True
                    i += 1
            if found:
                ip_nodeid[f"{ip} ({node['country']}, {node['as']}-{node['asn']})"].update(objs[ip])
                aliases.update(objs[ip])
                aliases_tmp.update(objs[ip])
            else:
                print("Not FOUND, strange")
        numbers_aliases.append(len(aliases_tmp))
        unique_ips.update(objs)
    print(f"Threshold : {threshold} max NodeID per IP address")
    print(f"Number of total unique IP that hold more than {threshold} Node ID : {len(unique_ips)}")
    print(f"Average number of Node ID held by IP addresses per hour : {round(st.mean(numbers_aliases),2)}")
    print(f"Median number of Node ID held by IP addresses per hour : {st.median(numbers_aliases)}")
    print(f"Average number of IP per hour that hold more than {threshold} NodeID : {round(st.mean(number_ips),2)}")
    print(f"Total number of unique Node ID : {len(aliases)}")
    ip_nodeid_counter = Counter()
    for ip, node_ids in ip_nodeid.items():
        ip_nodeid_counter[ip] += len(node_ids)
    print(ip_nodeid_counter.most_common(None))


def aliases_per_as(all_aliases_filename: str):
    res_as = defaultdict(int)
    res_country = defaultdict(int)
    res_ip_per_as = defaultdict(int)
    with open(all_aliases_filename, 'r') as f:
        all_aliases = json.load(f)
    all_aliases = all_aliases['aliases']
    for alias_count in all_aliases:
        res_as[alias_count[0].split(",", 1)[1][:-1]] += alias_count[1]
        res_country[alias_count[0].split(",", 1)[0].split('(')[1]] += alias_count[1]
        res_ip_per_as[alias_count[0].split(",", 1)[1][:-1].split('(')[0]] += 1
    pprint.pprint(Counter(res_as).most_common(30))
    pprint.pprint(Counter(res_country).most_common(30))
    pprint.pprint(Counter(res_ip_per_as).most_common(30))


def ip_per_as(raw_results: list[str]):
    ip_list_per_as = defaultdict(set)
    ip_per_as = defaultdict(int)
    for filename in tqdm(raw_results):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        for node_info in tqdm(json_obj["up"], leave=False):
            ip_list_per_as[f"{node_info['as']}-{node_info['asn']}"].add(node_info['IP address'])
    ip_count = 0
    ips = set()
    for AS, ip_list in ip_list_per_as.items():
        ip_per_as[AS] = len(ip_list)
        ips.update(ip_list)
        ip_count += len(ip_list)
    pprint.pprint(Counter(ip_per_as).most_common(30))
    print(f"Number of IP: {ip_count} or {len(ips)}, total number of ASes: {len(ip_per_as.keys())}")


def aliases_stats2(raw_results_json: list[str]):
    unique_ips = set()
    nodes_count = 0
    aliases = []
    aliases_count = 0
    for filename in tqdm(raw_results_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        objs = json_obj["up"]
        nodes_count += len(objs)
        for node_info in objs:
            unique_ips.add(node_info["IP address"])
            for nodeid in node_info["Seen node IDs"]:
                if nodeid in aliases:
                    pass
                    # print(f"{node_info['Seen node IDs']} already in set, {node_info['IP address']}-{node_info['UDP port']}")
                else:
                    aliases.append(nodeid)
            aliases_count += len(node_info["Seen node IDs"])
    print(f"Nodes count: {nodes_count}, unique IPs: {len(unique_ips)}, unique aliases : {len(aliases)}, aliases count (possible duplicated): {aliases_count}")


def nodeid_number_stats(raw_results_json: list[str]):
    nodeid_numbers_date = {}
    nodeid_numbers = []
    total_unique_nodeid = set()
    start = raw_results_json[0].split('_')[-1]
    end = raw_results_json[-1].split('_')[-1]
    for filename in tqdm(raw_results_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        date = filename.split('_')[-1]
        n = 0
        for node in json_obj["up"]:
            nodeid_list = node["Seen node IDs"]
            total_unique_nodeid.update(nodeid_list)
            n += len(nodeid_list)
        nodeid_numbers_date.setdefault(n, date)
        nodeid_numbers.append(n)
    print(f"From {start} to {end}")
    print(f"Total number of unique NodeID found : {len(total_unique_nodeid)}")
    print(f"Min : {min(nodeid_numbers)} occured during a crawl during the hour preceding {nodeid_numbers_date[min(nodeid_numbers)]}")
    print(f"Max : {max(nodeid_numbers)} occured during a crawl during the hour preceding {nodeid_numbers_date[max(nodeid_numbers)]}")
    print(f"Mean : {st.mean(nodeid_numbers)}")
    print(f"Median : {st.median(nodeid_numbers)}")


def plot_nodeid_concentration(raw_results_json: list[str]):
    neighbors_len_prefix = []
    nodeids = set()
    for filename in tqdm(raw_results_json):
        with open(filename, 'r') as fp:
            json_obj = json.load(fp)
        for node_info in json_obj["up"]:
            nodeids.update([nodeid_hash for _, nodeid_hash in node_info["Seen node IDs"]])
        # sorted_nodeids.sort(reverse=True)
    nodeids = sorted(nodeids)
    for n1, n2 in zip(nodeids, nodeids[1:]):
        xor_result = format((int(n1, 16) ^ int(n2, 16)), '0256b')
        leading_zeros = len(xor_result) - len(xor_result.lstrip("0"))
        neighbors_len_prefix.append(leading_zeros)
    neighbors_len_prefix_counter = Counter(neighbors_len_prefix)
    print(len(nodeids), math.floor(math.log2(len(nodeids))))
    last_value = sorted(neighbors_len_prefix_counter.keys())[-1] + 1
    values = []
    for i in range(0, last_value):
        d = {
            'x': i,
            'y': len(nodeids)/2**(i-math.floor(math.log2(len(nodeids)))),
        }
        values.append(d)
    df_theoretical = pd.DataFrame(values)

    values = []
    for i, prefix in sorted(neighbors_len_prefix_counter.items()):
        d = {
            'x': i,
            'y': prefix
        }
        values.append(d)
    df_sybil = pd.DataFrame(values)

    print(df_sybil.y.to_string(index=False))
    plt.yscale("log")
    plt.bar(df_sybil.x, df_sybil.y)
    plt.xlabel("Neighbours' common prefix length (bits)")
    plt.ylabel("Count")
    plt.show()

    fig, ax = plt.subplots()
    plt.yscale("log")
    plt.plot(df_theoretical.x, df_theoretical.y, label="Theoretical")
    plt.bar(df_sybil.x, df_sybil.y, alpha=0.5, label="Measured")
    plt.xlabel("Neighbours' common prefix length (bits)")
    plt.ylabel("Count")
    plt.legend(loc="upper right")
    plt.xticks(np.arange(math.floor(math.log2(len(nodeids))), last_value, 2))
    ax.set_ylim(0.5, 1000000)
    ax.set_xlim(17, 38)
    plt.tight_layout()
    plt.savefig("sybil_theorical_measured.pdf", facecolor='none', edgecolor='none')
    plt.show()


def plot_avg_nodes(stats_filename: str):
    with open(stats_filename, 'r') as f:
        stats_json = json.load(f)
    daily_stats = defaultdict(list)
    for date, stats in stats_json.items():
        tmp = date.split('_')
        date_key = f"{tmp[2]}-{tmp[1]}-{tmp[0]}"
        daily_stats[date_key].append(stats['up_count'])
    values = []
    for date, stats in daily_stats.items():
        d = {
            'x': date,
            'y': st.mean(stats)
        }
        values.append(d)
    df = pd.DataFrame(values)
    print(df)
    fig, ax = plt.subplots(figsize=(11,6))
    sns.lineplot(x='x', y='y', data=df, sort=False)
    ax.set(xlabel='Date', ylabel='Number of seen nodes')
    last = len(ax.get_xticklabels()) - 1
    rg = np.linspace(0, last, 15).astype(int).tolist()
    ax.set_xticks(rg)
    ax.set(ylim=(0, None))
    fig.autofmt_xdate()
    plt.tight_layout()
    plt.savefig("seen_nodes.pdf", facecolor='none', edgecolor='none')
    plt.show()


def intersect(raw_results1: list[str], raw_results2: list[str]):
    ips1 = set()
    for filename in tqdm(raw_results1):
        with open(filename) as f:
            json_obj = json.load(f)
        for node_info in tqdm(json_obj['up'], leave=False):
            for seen_nodeid in node_info['Seen node IDs']:
                ips1.add(f"{seen_nodeid}@{node_info['IP address']}:{node_info['UDP port']}")
    ips2 = set()
    for filename in tqdm(raw_results2, leave=False):
        with open(filename) as f:
            json_obj = json.load(f)
        for node_info in tqdm(json_obj['up'], leave=False):
            for seen_nodeid in node_info['Seen node IDs']:
                ips2.add(f"{seen_nodeid}@{node_info['IP address']}:{node_info['UDP port']}")

    print(f"Number of nodes in set1: {len(ips1)}")
    print(f"Number of nodes in set2: {len(ips2)}")
    ips_intersect = ips1.intersection(ips2)
    print(f"Number of nodes in intersect: {len(ips_intersect)}")
    only_in_ips2 = ips2-ips1
    only_in_ips1 = ips1-ips2
    print(f"Number of nodes only in set2: {len(only_in_ips2)}")
    print(f"IP in intersection: {list(ips_intersect)[:10]}")
    print("=====================================================")
    print(f"IP only in set 2: {list(only_in_ips2)[:10]}")
    print(f"IP only in set 1: {list(only_in_ips1)[:10]}")